import scrapy
from bs4 import BeautifulSoup
from scrapy_redis.spiders import RedisSpider
from douban.items import DoubanItem


class MovieRankSpider(RedisSpider):
    name = "movie_rank"
    # allowed_domains = ["movie.douban.com"]
    # start_urls = ["http://movie.douban.com/"]
    redis_key = 'movie_rank:start_urls'

    def parse(self, response, **kwargs):
        print(response.text,response.request.headers)

        # beautifulsoup解析response的dom树
        bs=BeautifulSoup(response.text,"html.parser")
        tr_tag=bs.find_all("tr",class_="item")

        for i in tr_tag:
            name=i.find_all("a")[1]["title"]
            info=i.find("p",class_="pl").text
            quote=i.find("span",class_="inq").text
            item=DoubanItem()
            item["name"]=name
            item['info']=info
            item['quote']=quote

            # 返回给引擎
            yield item


